{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8197732-8792-4800-b5d4-3239207d14ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "using Pkg\n",
    "using JuMP\n",
    "using Ipopt\n",
    "using Distributions\n",
    "using Plots\n",
    "using Dierckx\n",
    "using CSV\n",
    "using DataFrames\n",
    "using KernelDensity\n",
    "using StatsBase\n",
    "using Roots\n",
    "using GaussianDistributions\n",
    "using LaTeXStrings\n",
    "default(legendfontsize = 11, guidefont = (12, :black), tickfont = (12, :black), yguidefontrotation=0, xguidefontrotation=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "957fa279-32f5-49e6-9ca2-2eb1df64b0f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "#set parameters\n",
    "#gm = cost function parameter\n",
    "#tau = progressivity of HSV taxes, married\n",
    "#lm = average level of HSV taxes, married\n",
    "#tau2 = progressivity of HSV taxes, single\n",
    "#lm2 = average level of HSV taxes, single\n",
    "#aw = tail parameter of PLN cdf\n",
    "#sgw = shape parameter of PLN cdf\n",
    "#muw = location parameter of PLN cdf\n",
    "#rho_gauss = correlation parameter of Gaussian copula\n",
    "gm = 4\n",
    "tau = 0.06\n",
    "lm = 0.913 * (53/100)^tau\n",
    "tau2 = 0.034\n",
    "lm2 = 0.897 * (53/100)^tau2\n",
    "aw =  2.95\n",
    "muw = -0.71\n",
    "sgw = 0.40\n",
    "rho_gauss = 0.33\n",
    "\n",
    "#load & symmetrize dataset\n",
    "#load dataset\n",
    "#column 1, df2 = weight\n",
    "#column 2, df2 = spouse 1's earnings\n",
    "#column 3, df2 = spouse 2's earnings\n",
    "#column 4, df2 = total earnings\n",
    "#column 5, df2 = share of spouse 1's earnings\n",
    "df = CSV.read(\"../data/sample.csv\", DataFrame)\n",
    "sort!(df,[:pair])\n",
    "df1 = [df[:,1] df[:,3]/100000]\n",
    "numdf = size(df1)[1]\n",
    "for i=1:2:numdf\n",
    "    df1 = [df1; df1[i+1,:]'; df1[i,:]']\n",
    "end\n",
    "df1[:,1] = df1[:,1]/sum(df1[:,1])\n",
    "df2 = [df1[1:2:end,:] df1[2:2:end,:]]\n",
    "df2 = [df2[:,1:2] df2[:,4]]\n",
    "df2 = [df2 df2[:,2]+df2[:,3] df2[:,2]./(df2[:,2]+df2[:,3])]\n",
    "\n",
    "#compute earnings distribution and marginal taxes implied by HSV taxes\n",
    "#uniq_indy = mass of agents with lower individual income, sorted\n",
    "#uniq_toty = mass of agents with lower family income, sorted\n",
    "uniq_indy = unique(df2[:,2])\n",
    "uniq_indy = [uniq_indy [2*sum((df2[:,2] .<= y) .* df2[:,1]) for y in uniq_indy]]\n",
    "uniq_indy = uniq_indy[sortperm(uniq_indy[:, 1]), :]\n",
    "uniq_toty = unique(df2[:,4])\n",
    "uniq_toty = [uniq_toty [2*sum((df2[:,4] .<= y) .* df2[:,1]) for y in uniq_toty]]\n",
    "uniq_toty = uniq_toty[sortperm(uniq_toty[:, 1]), :]\n",
    "ygr = 0.05:3.5/1000:3.5 #grid of earnings in data\n",
    "nyt = length(ygr);\n",
    "mtaxf(y) = 1 - lm*(1-tau)*y^(-tau) #marginal taxes on married implied by HSV taxes\n",
    "mtaxf2(y) = 1 - lm2*(1-tau2)*y^(-tau2) #marginal taxes on single implied by HSV taxes\n",
    "\n",
    "#compute densities and pareto weights\n",
    "#pdf1 = marginal density\n",
    "#alpha1 = Pareto weights on singles\n",
    "#pdf2 = joint density\n",
    "#alpha2 = Pareto weights on couples\n",
    "#set geometric grid of individual productivities\n",
    "wmin = 0.12;\n",
    "wmax = 10;\n",
    "nx = 400;\n",
    "wgr = [wmin*(wmax/wmin)^((i-1)/(nx-1)) for i=1:nx]\n",
    "delta = 1 .- (wgr[1:end-1] ./wgr[2:end]).^gm\n",
    "m = 0.35\n",
    "alphaf(w) = exp(-m*w^(gm/(gm-1))) - exp(-m*wmax^(gm/(gm-1)))\n",
    "function cdfpln(t,a,mu,sg)\n",
    "    return cdf(Normal(),(log(t)-mu)/sg)-exp(a*mu+a^2*sg^2/2)*t^(-a)*cdf(Normal(),(log(t)-mu-a*sg^2)/sg);\n",
    "end\n",
    "cdf1 = cdfpln.(wgr[1:end-1],aw,muw,sgw);\n",
    "pdf1 = diff([0; cdf1; 1])   \n",
    "alpha1 = alphaf.(wgr)\n",
    "cdf2 = [cdf(Gaussian([0, 0], [1 rho_gauss; rho_gauss 1]), [quantile(Normal(), cdfpln(w1,aw,muw,sgw)), quantile(Normal(), cdfpln(w2,aw,muw,sgw))]) for w1 in wgr[1:end-1], w2 in wgr[1:end-1]]\n",
    "pdf2 = diff([zeros(1,nx); diff([zeros(nx,1) [cdf2 cdf1; cdf1' 1]], dims=2)], dims=1) \n",
    "pdf2N = pdf1 * pdf1' #pmdf with independent productifies\n",
    "alpha2 = alphaf.(wgr) .+ alphaf.(wgr)'\n",
    "\n",
    "#function that solves discretized relaxed problem in 1d taking 1d pareto weights (alpha1) and probability mass function (pdf1) as inputs \n",
    "#y1d1 = earnings\n",
    "#lm1d1 = distortions\n",
    "#lm1d1 ./ (1 .+ lm1d1) = marginal taxes\n",
    "#v1d = value\n",
    "function comp1d(alpha1,pdf1)\n",
    "    alpha1 = alpha1/sum(alpha1 .* pdf1) #normalize Pareto weights\n",
    "    model1 = Model(optimizer_with_attributes(Ipopt.Optimizer))\n",
    "    @variable(model1, y1[1:nx] >= 0) #earnings \n",
    "    @variable(model1, v[1:nx,1:nx]) #value\n",
    "    @constraint(model1, v[1,1] == 0)\n",
    "    @NLconstraint(model1, ic1[i=2:nx], v[i] >= v[i-1] + (y1[i-1]/wgr[i-1])^gm/gm*delta[i-1]) #downward ic i->i-1\n",
    "    @NLobjective(model1, Max, (sum((alpha1[i]-1)*pdf1[i]*v[i] for i=1:nx) + sum((y1[i]-(y1[i]/wgr[i])^gm/gm)*pdf1[i] for i=1:nx))/pdf1[1]) #objective to maximize\n",
    "    #set_optimizer_attribute(model1, \"print_level\",1)    \n",
    "    set_optimizer_attribute(model1, \"max_iter\", 200)\n",
    "    #set_optimizer_attribute(model1, \"max_cpu_time\", 600)\n",
    "    set_optimizer_attribute(model1, \"constr_viol_tol\", 1e-12)\n",
    "    set_optimizer_attribute(model1, \"acceptable_tol\", 1e-12)\n",
    "    set_optimizer_attribute(model1, \"tol\", 1e-12)\n",
    "    optimize!(model1)\n",
    "    y1d1 = zeros(nx)\n",
    "    v1d = zeros(nx)\n",
    "    for i=1:nx\n",
    "        y1d1[i] = value(y1[i])\n",
    "        v1d[i] = value(v[i])        \n",
    "    end\n",
    "    lm1d1 =  (wgr).^gm ./(y1d1).^(gm-1) .-1 #compute optimal distortions lambda   \n",
    "    m1d1 = lm1d1 ./ (1 .+ lm1d1)\n",
    "return y1d1, lm1d1, m1d1, v1d\n",
    "end\n",
    "\n",
    "#function that solves discretized relaxed problem in 2d taking 2d pareto weights (alpha2) and probability mass function (pdf2) as inputs\n",
    "#y2d1 = spouse 1's earnings\n",
    "#y2d2 = spouse 2's earnings\n",
    "#lm1d1 = spouse 1's distortions\n",
    "#lm1d2 = spouse 2's distortions\n",
    "#lm2d1 ./ (1 .+ lm2d1) = spouse 1's taxes\n",
    "#lm2d2 ./ (1 .+ lm2d2) = spouse 2's taxes\n",
    "#v2d = value\n",
    "function comp2d(alpha2,pdf2)\n",
    "    alpha2 = alpha2/sum(alpha2 .* pdf2)  #normalize pareto weights\n",
    "    model2 = Model(optimizer_with_attributes(Ipopt.Optimizer))\n",
    "    @variable(model2, y1[1:nx,1:nx] >= 0) #spouse 1's earnings \n",
    "    @variable(model2, y2[1:nx,1:nx] >= 0); #spouse 2's earnings\n",
    "    @variable(model2, v[1:nx,1:nx]) #value\n",
    "    @constraint(model2, v[1,1] == 0)\n",
    "    @NLconstraint(model2, ic1[i=2:nx,j=1:nx], v[i,j] >= v[i-1,j] + (y1[i-1,j]/wgr[i-1])^gm/gm*delta[i-1]) #downward ic (i,j)->(i-1,j)\n",
    "    @NLconstraint(model2, ic2[i=2:nx,j=1:nx], v[j,i] >= v[j,i-1] + (y2[j,i-1]/wgr[i-1])^gm/gm*delta[i-1]) #downward ic (j,i)->(j,i-1)\n",
    "    @NLobjective(model2, Max, (sum((alpha2[i,j]-1)*pdf2[i,j]*v[i,j] for i=1:nx for j=1:nx) +\n",
    "        sum((y1[i,j]-(y1[i,j]/wgr[i])^gm/gm)*pdf2[i,j] for i=1:nx for j=1:nx) + \n",
    "        sum((y2[i,j]-(y2[i,j]/wgr[j])^gm/gm)*pdf2[i,j] for i=1:nx for j=1:nx))/pdf2[1,1]) #objective to maximize\n",
    "    set_optimizer_attribute(model2, \"max_iter\", 200)\n",
    "    set_optimizer_attribute(model2, \"constr_viol_tol\", 1e-12)\n",
    "    set_optimizer_attribute(model2, \"acceptable_tol\", 1e-12)\n",
    "    set_optimizer_attribute(model2, \"tol\", 1e-12)\n",
    "    optimize!(model2)\n",
    "    y2d1 = zeros(nx,nx)\n",
    "    y2d2 = zeros(nx,nx)\n",
    "    v2d = zeros(nx,nx)\n",
    "    for i=1:nx\n",
    "        for j=1:nx\n",
    "            y2d1[i,j] = value(y1[i,j]);\n",
    "            y2d2[i,j] = value(y2[i,j]);\n",
    "            v2d[i,j] = value(v[i,j]);                \n",
    "        end\n",
    "    end\n",
    "    lm2d1 =  (wgr).^gm ./(y2d1).^(gm-1) .-1\n",
    "    lm2d2 =  (wgr').^gm ./(y2d2).^(gm-1) .-1\n",
    "    m2d1 = lm2d1 ./ (1 .+ lm2d1)\n",
    "    m2d2 = lm2d2 ./ (1 .+ lm2d2)\n",
    "return y2d1, y2d2, lm2d1, lm2d2, m2d1, m2d2, v2d\n",
    "end\n",
    "\n",
    "#compute 1d and 2d models\n",
    "y1d1, lm1d1, m1d1, v1d = comp1d(alpha1, pdf1)\n",
    "y2d1, y2d2, lm2d1, lm2d2, m2d1, m2d2, v2d = comp2d(alpha2, pdf2)\n",
    "y2d1N, y2d2N, lm2d1N, lm2d2N, m2d1N, m2d2N, v2dN = comp2d(alpha2, pdf2N)\n",
    "\n",
    "#compute earnings distribution implied by optimal taxes\n",
    "fy(y) = sum(pdf2[2:nx-1,2:nx-1] .* (y2d1[2:nx-1,2:nx-1] .<= y))\n",
    "ygropt = minimum(y2d1):(maximum(y2d1)-minimum(y2d1))/1000:maximum(y2d1) #grid of optimal individual earnings in 2d \n",
    "nytopt = length(ygropt) \n",
    "ecdfy2d1opt = [fy(y) for y in ygropt]\n",
    "spl = Spline2D(vec(y2d1[2:nx-1,2:nx-1]), vec(y2d2[2:nx-1,2:nx-1]),vec(m2d1[2:nx-1,2:nx-1]), kx= 1, ky= 1, s= 0.01) #linear spline interpolater to approximate individual productivities at given percentile\n",
    "qygr = fy.(ygropt); #quantile grid of optimal individual earnings in 2d \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9405e57d-bebf-4663-ad30-d3cb8641f262",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 5.(a)\n",
    "#plot spouse i marginal taxes at 0.1, 0.5 and 0.9 percentiles of spouse -i earnings\n",
    "plot(y1d1[2:end]*100, m1d1[2:end], label=\"single\", line = (2.75,:purple,:dot))\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1,  ygropt[findlast(qygr .<= 0.1)]) for y1 in ygropt[2:nytopt]],  label = \"-i's earnings percentile = 10\", line = (2.75,:red, :dashdot), xlabel = \"i's earnings in \\$1 k\", ylabel = \"i's marginal tax\",foreground_color_legend = nothing, legend =:bottomright, xlim = [0,302])\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1, ygropt[findlast(qygr .<= 0.5)]) for y1 in ygropt[2:nytopt]], label = \"-i's earnings percentile = 50\", line = (2.75,:blue, :dash))\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1, ygropt[findlast(qygr .<= 0.9)]) for y1 in ygropt[2:nytopt]], label = \"-i's earnings percentile = 90\", line = (2.75,:black))\n",
    "plot!((y2d1N * pdf1*100)[2:end], (m2d1N * pdf1)[2:end], label=\"random matching\", line = (1.75,:gray,:dashdotdot))\n",
    "savefig(\"../output/fig5a_opttaxperc.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56b543fa-742e-4bda-b0a8-493b82affd40",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 5.(b)\n",
    "#plot spouse i marginal taxes for given ratios of spouse i and spouse -i earnings\n",
    "plot(y1d1[2:end]*100, m1d1[2:end], label=\"single\", line = (2.75,:purple,:dot))\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1,  y1/2) for y1 in ygropt[2:nytopt]],  label = \"-i earns twice less\", ylim=[0,0.55], line = (2.75,:red, :dashdot), xlabel = \"i's earnings in \\$1 k\", ylabel = \"i's marginal tax\",foreground_color_legend = nothing, legend =:bottomright, xlim = [0,302])\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1, y1) for y1 in ygropt[2:nytopt]], label = \"-i earns same\", line = (2.75,:blue, :dash))\n",
    "plot!(ygropt[2:nytopt]*100, [evaluate(spl, y1, y1*2) for y1 in ygropt[2:nytopt]], label = \"-i earns twice more\", line = (2.75,:black))\n",
    "plot!((y2d1N * pdf1*100)[2:end], (m2d1N * pdf1)[2:end], label=\"random matching\", line = (1.75,:gray))\n",
    "savefig(\"../output/fig5b_opttaxrel.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "189605da-928b-4716-8717-cc384cfd1c80",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 5.(c)\n",
    "#plot i's marginal rates implied by HSV taxes given spouse -i percentile = 0.1, 0.5, 0.9\n",
    "plot(ygr[2:nyt]*100,  mtaxf2.(ygr[2:nyt]), label=\"single\", line = (2.75,:purple,:dot))\n",
    "plot!(ygr[2:nyt]*100, mtaxf.(ygr[2:nyt] .+ uniq_indy[findlast(uniq_indy[:,2] .<= 0.1)]),  label = \"-i's earnings percentile = 10\",foreground_color_legend = nothing,  line = (2.5,:red, :dashdot), ylim=[-0.01,0.55], xlabel = \"i's earnings in \\$1 k\", ylabel = \"i's marginal tax\", legend =:bottomright, xlim = [0,302])\n",
    "plot!(ygr[2:nyt]*100,  mtaxf.(ygr[2:nyt] .+ uniq_indy[findlast(uniq_indy[:,2] .<= 0.5)]), label = \"-i's earnings percentile = 50\", line = (2.5,:blue, :dash))\n",
    "plot!(ygr[2:nyt]*100,  mtaxf.(ygr[2:nyt] .+ uniq_indy[findlast(uniq_indy[:,2] .<= 0.9)]), label = \"-i's earnings percentile = 90\", line = (2.5,:black))\n",
    "savefig(\"../output/fig5c_statusperc.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ab992a3-fa67-46c1-bc10-80f5460e8612",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 5.(d)\n",
    "#plot i's marginal rates implied by HSV taxes for given ratios of spouse i and spouse -i earnings\n",
    "plot(ygr[2:nyt]*100,  mtaxf2.(ygr[2:nyt]), label=\"single\", line = (2.75,:purple,:dot))\n",
    "plot!(ygr[2:nyt]*100, mtaxf.(ygr[2:nyt]*1.5),foreground_color_legend = nothing,   label = \"-i earns twice less\", ylim=[0,0.55], line = (2.5,:red, :dashdot), xlabel = \"i's earnings in \\$1 k\", ylabel = \"i's marginal tax\", legend =:bottomright, xlim = [0,302])\n",
    "plot!(ygr[2:nyt]*100,  mtaxf.(ygr[2:nyt]*2), label = \"-i earns same\", line = (2.5,:blue, :dash))\n",
    "plot!(ygr[2:nyt]*100,  mtaxf.(ygr[2:nyt]*3), label = \"-i earns twice more\", line = (2.5,:black))\n",
    "savefig(\"../output/fig5d_statusrel.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ded9d35-0833-4f5b-abc9-1ef53d9eda13",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 6\n",
    "plot(ygr*100, mtaxf.(ygr),  line = (2.75,:purple, :dot), label = \"U.S. tax\")\n",
    "plot!(ygr*100,spl.(ygr*(1-0.5),ygr*0.5)*(1-0.5)+spl.(ygr*(1-0.5),ygr*0.5)*0.5, label = \"secondary earner's share=1/2\", line = (2.75,:red, :dashdot), xlabel = \"family earnings in \\$1 k\", ylabel = \"family marginal tax\", foreground_color_legend = nothing, legend =:bottomright, xlim = [0,302]) \n",
    "plot!(ygr*100,spl.(ygr*(1-1/3),ygr*1/3)*(1-1/3)+spl.(ygr*(1-1/3),ygr*1/3)*1/3, line = (2.75,:blue, :dash), label = \"secondary earner's share=1/3\")\n",
    "plot!(ygr*100,spl.(ygr*(1-0),ygr*0)*(1-0)+spl.(ygr*(1-0),ygr*0)*0, line = (2.75,:black), label = \"secondary earner's share=0\")\n",
    "savefig(\"../output/fig6_opttaxshare.pdf\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.8.5",
   "language": "julia",
   "name": "julia-1.8"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
